{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import sys\n",
    "IN_COLAB = 'google.colab' in sys.modules\n",
    "if IN_COLAB:\n",
    "    !git clone https://github.com/cs357/demos-cs357.git\n",
    "    !mv demos-cs357/figures/ .\n",
    "    !mv demos-cs357/additional_files/ ."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import numpy.linalg as la\n",
    "import matplotlib.pyplot as plt\n",
    "import scipy.sparse as sparse\n",
    "\n",
    "# print(plt.style.available) # uncomment to print all styles\n",
    "import seaborn as sns\n",
    "sns.set(font_scale=2)\n",
    "plt.style.use('seaborn-whitegrid')\n",
    "plt.rcParams['figure.figsize'] = (10,10)\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<img src=\"figures\\sparse.png\" alt=\"Sparse\" style=\"width: 300px;\"/>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Create a Sparse Matrix in COO"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "data = [1.9, -5.2, 4.4, 5.8, 3.6, 7.2, 2.7]\n",
    "i    = [  0,    0,    2,   2,   2,   3,   3]\n",
    "j    = [  1,    3,    0,   1,   2,   2,   3]\n",
    "A = sparse.coo_matrix((data, (i, j)))\n",
    "\n",
    "print(A)\n",
    "print(A.todense())\n",
    "\n",
    "A = A.tocsr()\n",
    "print(A.data)\n",
    "print(A.indptr)\n",
    "print(A.indices)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def bmatrix(a):\n",
    "    \"\"\"Returns a LaTeX bmatrix\n",
    "\n",
    "    :a: numpy array\n",
    "    :returns: LaTeX bmatrix as a string\n",
    "    \"\"\"\n",
    "    if len(a.shape) > 2:\n",
    "        raise ValueError('bmatrix can at most display two dimensions')\n",
    "    lines = str(a).replace('[', '').replace(']', '').splitlines()\n",
    "    rv = [r'\\begin{bmatrix}']\n",
    "    rv += ['  ' + ' & '.join(l.split()) + r'\\\\' for l in lines]\n",
    "    rv +=  [r'\\end{bmatrix}']\n",
    "    return '\\n'.join(rv)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "print(bmatrix(A.indices))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "$$data = \n",
    "\\begin{bmatrix}\n",
    "  1.9 & -5.2 & 4.4 & 5.8 & 3.6 & 7.2 & 2.7\\\\\n",
    "\\end{bmatrix}$$\n",
    "\n",
    "$$rowptr = \n",
    "\\begin{bmatrix}\n",
    "  0 & 2 & 2 & 5 & 7\\\\\n",
    "\\end{bmatrix}\n",
    "$$\n",
    "\n",
    "$$col = \n",
    "\\begin{bmatrix}\n",
    "  1 & 3 & 0 & 1 & 2 & 2 & 3\\\\\n",
    "\\end{bmatrix}\n",
    "$$"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "$$A  = \\begin{bmatrix}\n",
    "  0. & 1.9 & 0. & -5.2\\\\\n",
    "  0. & 0. & 0. & 0.\\\\\n",
    "  4.4 & 5.8 & 3.6 & 0.\\\\\n",
    "  0. & 0. & 7.2 & 2.7\\\\\n",
    "\\end{bmatrix}$$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "data = [-5.2, 1.9, 0.3, 9.1, 4.4, 5.8, 3.6, 7.2, 2.7]\n",
    "i    = [   0,   0,   1,   1,   2,   2,   2,   3,   3]\n",
    "j    = [   3,   1,   0,   2,   0,   1,   2,   2,   3]\n",
    "A = sparse.coo_matrix((data, (i, j)))\n",
    "print(A.todense())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "print(A.data)\n",
    "print(A.data.dtype, 'Length: ', len(A.data))\n",
    "print('-')\n",
    "print(A.row)\n",
    "print(A.row.dtype, 'Length: ', len(A.row))\n",
    "print('-')\n",
    "print(A.col)\n",
    "print(A.col.dtype, 'Length: ', len(A.row))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Convert to CSR"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "A = A.tocsr()\n",
    "print(A)\n",
    "print(A.todense())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "print(A.data)\n",
    "print(A.data.dtype, 'Length: ', len(A.data))\n",
    "print('-')\n",
    "print(A.indptr)\n",
    "print(A.indptr.dtype, 'Length: ', len(A.indptr))\n",
    "print('-')\n",
    "print(A.indices)\n",
    "print(A.indices.dtype, 'Length: ', len(A.indices))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Try some timings: small, `Harvard500`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import scipy.io as sio\n",
    "d = sio.loadmat('./additional_files/Harvard500.mat')\n",
    "A = d['Problem'][0][0][2].tocsr()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "A"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "plt.figure(figsize=(10,10))\n",
    "plt.spy(A, ms=5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "A.shape[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "v = np.random.rand(A.shape[0])\n",
    "w = np.random.rand(A.shape[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "%timeit v = A * w"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "Adense = A.todense()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "%timeit v = Adense.dot(w)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Medium `wb-cs-stanford`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "d = sio.loadmat('./additional_files/wb-cs-stanford.mat')\n",
    "A = d['Problem'][0][0][2].tocsr()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "plt.figure(figsize=(10,10))\n",
    "plt.spy(A, ms=5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "A"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "v = np.random.rand(A.shape[0])\n",
    "w = np.random.rand(A.shape[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "%timeit v = A * w"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "Adense = A.todense()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "%timeit v = Adense.dot(w)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Large `email-Enron`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "d = sio.loadmat('./additional_files/email-Enron.mat')\n",
    "A = d['Problem'][0][0][2].tocsr()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "plt.figure(figsize=(10,10))\n",
    "plt.spy(A, ms=5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "A"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "v = np.random.rand(A.shape[0])\n",
    "w = np.random.rand(A.shape[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "%timeit v = A * w"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "Adense = A.todense()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "%timeit v = Adense.dot(w)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Does 36692 sound \"large\"?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
